/*LIS Cross-section Data center in Luxembourg*/

/*email: usersupport@lisdatacenter.org*/

/*LIS Self Teaching Package 2022*/
/*Part II: Gender, employment, and wages*/
/*SAS version*/

/*last change of this version of the syntax: 15-01-2022*/

 
/*Exercise 8: Pooled regressions and normalised weights*/


OPTIONS NONOTES NOSOURCE NOFMTERR NODATE NONUMBER NOCENTER LABEL LS=MAX PS=MAX NOMPRINT NOMLOGIC NOSYMBOLGEN;
TITLE "";

%MACRO pool ;

  %LET i = 1 ;
  %DO %UNTIL (&i > 3) ;

    %LET ccyy = %SCAN(&all,&i) ;
    DATA &ccyy.h ;
     SET &&&ccyy.h (KEEP=hid own) ;
    RUN ;
    PROC SORT DATA=&ccyy.h ;
      BY hid ;
    RUN ;
    DATA &ccyy.p ;
     SET &&&ccyy.p (KEEP=hid did dname pwgt ppopwgt relation partner ageyoch age sex immigr
                         educ educ_c emp status1 ptime1 hwage1); 
    RUN ;
    PROC SORT DATA=&ccyy.p ;
      BY hid ;
    RUN ;
        DATA &ccyy ;
      MERGE &ccyy.h &ccyy.p ;
      BY hid ;
        IF ((25 <= age <= 54) AND (relation LE 2200)) ; 
        IF (100 <= own <= 199) THEN homeowner = 1;
        IF (200 <= own <= 299) THEN homeowner = 0;
        IF	 (0 <= ageyoch < 6)	THEN achildcat  = 1 ;
        ELSE IF (6 <= ageyoch < 18)		THEN achildcat  = 2 ;
        ELSE  achildcat  = 0 ;	        
        IF      (achildcat = 1)           THEN youngchild = 1 ;
        ELSE IF (achildcat in (0,2))      THEN youngchild = 0 ;
        ELSE                                   youngchild = . ;
        IF      (achildcat = 2)           THEN oldchild   = 1 ;
        ELSE IF (achildcat in (0,1))      THEN oldchild   = 0 ;
        ELSE                                   oldchild   = . ;
		IF	(100 <= status1 <= 120) THEN depemp = 1 ;
		ELSE IF (200 <= status1 <= 240) THEN depemp = 0 ;
		ELSE depemp = . ;	
		agesq = age*age ;
        IF      (educ in (1,3))    THEN mededuc = 0 ;
        ELSE IF (educ = 2)         THEN mededuc = 1 ;
        ELSE                            mededuc = . ; 
        IF      (educ in (1,2))    THEN hieduc  = 0 ;
        ELSE IF (educ = 3)         THEN hieduc  = 1 ;
        ELSE                            hieduc  = . ;
        belgium=0;
        IF dname = 'be04' THEN belgium=1;
        greece =0;
        IF dname = 'gr04' THEN greece=1 ;      
    RUN ;

		DATA &ccyy ;
		 SET &ccyy ;	
				hourwage = hwage1 ;
				IF hourwage=. THEN DELETE;
				IF (hourwage < 0 ) THEN hourwage = 0 ;
				hourwagelog=log(hourwage); 
				IF( (hourwagelog=.)  AND (hourwage^=.) ) THEN hourwagelog=0;
	PROC SORT DATA=&ccyy ;
	  BY did hourwagelog;
RUN ;
				
		PROC SORT DATA=&ccyy ;
			BY did hourwagelog;
		RUN ;

		PROC UNIVARIATE DATA=&ccyy NOPRINT ;
		  VAR hourwagelog ;
			WEIGHT ppopwgt ;
				OUTPUT OUT= temp P25=q25   P75=q75; 
		RUN ;
		DATA _NULL_; 
		  SET temp; 
			CALL SYMPUT("b",q25); 
			CALL SYMPUT("t",q75); 
		RUN; 
		DATA &ccyy ;
		 SET &ccyy ;
			iqr=&t-&b; 
			upper_bound=&t + (iqr * 3); 
			lower_bound=&b - (iqr * 3); 
			IF hourwage>exp(upper_bound) THEN hourwage=exp(upper_bound); 
			IF hourwage<exp(lower_bound) THEN hourwage=exp(lower_bound); 
			IF dname = 'be04' THEN ppp=0.86 ;
        ELSE IF dname = 'gr04' THEN ppp=0.65 ;
        ELSE ppp=1    ;
        hourwage_ppp = hourwage/ppp      ;
         logwage_ppp = LOG(hourwage_ppp) ;
    RUN ;

    %IF %EVAL(&i) = 1 %THEN 
      %DO ;
        DATA current ;
         SET &ccyy ;
        RUN ;
      %END ;
    %ELSE 
      %DO ;
        PROC APPEND BASE=current DATA=&ccyy FORCE ;
        RUN ;
      %END ;
    %LET i = %EVAL(&i+1) ;
  %END ;

  PROC SORT DATA=current;
    BY sex;
  RUN ;

  PROC SURVEYREG DATA=current;
    BY     sex ;
    WEIGHT ppopwgt      ;
    MODEL  logwage_ppp =age agesq mededuc hieduc immigr partner youngchild oldchild
     ptime1 homeowner belgium greece ;
  RUN ;

%MEND pool ;
%LET all = us04 be04 gr04 ;
%pool
